The MongoDB Aggregation Framework Final Exam Quiz Answer
In this article i am gone to share MongoDB-University
Course: The MongoDB Aggregation Framework Final Exam Quiz
Answer with you..
Final Exam Quiz
Question 1)
Consider the following aggregation pipelines:
Pipeline 1
db.coll.aggregate([
{"$match": {"field_a": {"$gt": 1983}}},
{"$project": { "field_a": "$field_a.1", "field_b": 1, "field_c":
1 }},
{"$replaceRoot":{"newRoot": {"_id": "$field_c", "field_b":
"$field_b"}}},
{"$out": "coll2"},
{"$match": {"_id.field_f": {"$gt": 1}}},
{"$replaceRoot":{"newRoot": {"_id": "$field_b", "field_c":
"$_id"}}}
])
Pipeline 2
db.coll.aggregate([
{"$match": {"field_a": {"$gt": 111}}},
{"$geoNear": {
"near": { "type": "Point", "coordinates": [ -73.99279 ,
40.719296 ] },
"distanceField": "distance"}},
{"$project": { "distance": "$distance", "name": 1, "_id": 0
}}
])
Pipeline 3
db.coll.aggregate([
{
"$facet": {
"averageCount": [
{"$unwind": "$array_field"},
{"$group": {"_id": "$array_field", "count":
{"$sum": 1}}}
],
"categorized": [{"$sortByCount":
"$arrayField"}]
},
},
{
"$facet": {
"new_shape": [{"$project": {"range":
"$categorized._id"}}],
"stats": [{"$match": {"range": 1}}, {"$indexStats":
{}}]
}
}
])
Which of the following statements are correct?
- Pipeline 3 executes correctly
- Pipeline 2 fails because we cannot project distance field
- Pipeline 3 fails since you can only have one $facet stage per pipeline
- Pipeline 1 is incorrect because you can only have one $replaceRoot stage in your pipeline
- Pipeline 1 fails since $out is required to be the last stage of the pipeline
- Pipeline 2 is incorrect because $geoNear needs to be the first stage of our pipeline
- Pipeline 3 fails because $indexStats must be the first stage in a pipeline and may not be used within a $facet
Question 2)
Consider the following collection:
db.collection.find()
{
"a": [1, 34, 13]
}
The following pipelines are executed on top of this collection, using a
mixed set of different expression accross the different stages:
Pipeline 1
db.collection.aggregate([
{"$match": { "a" : {"$sum": 1} }},
{"$project": { "_id" : {"$addToSet": "$a"} }},
{"$group": { "_id" : "", "max_a": {"$max": "$_id"} }}
])
Pipeline 2
db.collection.aggregate([
{"$project": { "a_divided" : {"$divide": ["$a", 1]}
}}
])
Pipeline 3
db.collection.aggregate([
{"$project": {"a": {"$max": "$a"}}},
{"$group": {"_id": "$$ROOT._id", "all_as": {"$sum":
"$a"}}}
])
Given these pipelines, which of the following statements are
correct?
- Pipeline 3 is correct and will execute with no error
- Pipeline 2 fails because the $divide operator only supports numeric types
- Pipeline 1 will fail because $max can not operator on _id field
- Pipeline 2 is incorrect since $divide cannot operate over field expressions
- Pipeline 1 is incorrect because you cannot use an accumulator expression in a $match stage.
Question 3)
Consider the following collection documents:
db.people.find()
{ "_id" : 0, "name" : "Bernice Pope", "age" : 69, "date" :
ISODate("2017-10-04T18:35:44.011Z") }
{ "_id" : 1, "name" : "Eric Malone", "age" : 57, "date" :
ISODate("2017-10-04T18:35:44.014Z") }
{ "_id" : 2, "name" : "Blanche Miller", "age" : 35, "date" :
ISODate("2017-10-04T18:35:44.015Z") }
{ "_id" : 3, "name" : "Sue Perez", "age" : 64, "date" :
ISODate("2017-10-04T18:35:44.016Z") }
{ "_id" : 4, "name" : "Ryan White", "age" : 39, "date" :
ISODate("2017-10-04T18:35:44.019Z") }
{ "_id" : 5, "name" : "Grace Payne", "age" : 56, "date" :
ISODate("2017-10-04T18:35:44.020Z") }
{ "_id" : 6, "name" : "Jessie Yates", "age" : 53, "date" :
ISODate("2017-10-04T18:35:44.020Z") }
{ "_id" : 7, "name" : "Herbert Mason", "age" : 37, "date" :
ISODate("2017-10-04T18:35:44.020Z") }
{ "_id" : 8, "name" : "Jesse Jordan", "age" : 47, "date" :
ISODate("2017-10-04T18:35:44.020Z") }
{ "_id" : 9, "name" : "Hulda Fuller", "age" : 25, "date" :
ISODate("2017-10-04T18:35:44.020Z") }
And the aggregation pipeline execution result:
db.people.aggregate(pipeline)
{ "_id" : 8, "names" : [ "Sue Perez" ], "word" : "P" }
{ "_id" : 9, "names" : [ "Ryan White" ], "word" : "W" }
{ "_id" : 10, "names" : [ "Eric Malone", "Grace Payne" ], "word" : "MP"
}
{ "_id" : 11, "names" : [ "Bernice Pope", "Jessie Yates", "Jesse
Jordan", "Hulda Fuller" ], "word" : "PYJF" }
{ "_id" : 12, "names" : [ "Herbert Mason" ], "word" : "M" }
{ "_id" : 13, "names" : [ "Blanche Miller" ], "word" : "M" }
Which of the following pipelines generates the output result?
var pipeline = [{
"$project": {
"surname": { "$arrayElemAt": [ {"$split": [
"$name", " " ] }, 1]},
"name_size": { "$add" : [{"$strLenCP":
"$name"}, -1]},
"name":1
}
},
{
"$group": {
"_id": "$name_size",
"word": { "$addToSet": {"$substr":
[{"$toUpper":"$name"}, 3, 2]} },
"names": {"$push": "$surname"}
}
},
{
"$sort": {"_id": -1}
}
]
```
```
[X]
var pipeline = [{
"$project": {
"surname_capital": { "$substr": [{"$arrayElemAt":
[ {"$split": [ "$name", " " ] }, 1]}, 0, 1 ] },
"name_size": { "$add" : [{"$strLenCP":
"$name"}, -1]},
"name": 1
}
},
{
"$group": {
"_id": "$name_size",
"word": { "$push": "$surname_capital" },
"names": {"$push": "$name"}
}
},
{
"$project": {
"word": {
"$reduce": {
"input": "$word",
"initialValue": "",
"in": { "$concat": ["$$value",
"$$this"] }
}
},
"names": 1
}
},
{
"$sort": { "_id": 1}
}
]
```
```
var pipeline = [{
"$sort": { "date": 1 }
},
{
"$group": {
"_id": { "$size": { "$split": ["$name", " "]}
},
"names": {"$push": "$name"}
}
},
{
"$project": {
"word": {
"$zip": {
"inputs": ["$names"],
"useLongestLength": false,
}
},
"names": 1
}
}]
```
___________________________________________________________________________
Question 4)
$facet is an aggregation stage that allows for sub-pipelines to be
executed.
var pipeline = [
{
$match: { a: { $type: "int" } }
},
{
$project: {
_id: 0,
a_times_b: { $multiply: ["$a", "$b"] }
}
},
{
$facet: {
facet_1: [{ $sortByCount: "a_times_b" }],
facet_2: [{ $project: { abs_facet1: { $abs:
"$facet_1._id" } } }],
facet_3: [
{
$facet: {
facet_3_1: [{ $bucketAuto:
{ groupBy: "$_id", buckets: 2 } }]
}
}
]
}
}
]
In the above pipeline, which uses $facet, there are some incorrect
stages or/and expressions being used.
Which of the following statements point out errors in the
pipeline?
- a $multiply expression takes a document as input, not an array.
- can not nest a $facet stage as a sub-pipeline.
- $sortByCount cannot be used within $facet stage.
- facet_2 uses the output of a parallel sub-pipeline, facet_1, to compute an expression
- a $type expression does not take a string as its value; only the BSON numeric values can be specified to identify the types.
Question 5)
Consider a company producing solar panels and looking for the next
markets they want to target in the USA. We have a collection with all
the major cities (more than 100,000 inhabitants) from all over the
World with recorded number of sunny days for some of the last
years.
A sample document looks like the following:
db.cities.findOne()
{
"_id": 10,
"city": "San Diego",
"region": "CA",
"country": "USA",
"sunnydays": [220, 232, 205, 211, 242, 270]
}
The collection also has these indexes:
db.cities.getIndexes()
[
{
"v": 2,
"key": {
"_id": 1
},
"name": "_id_",
"ns": "test.cities"
},
{
"v": 2,
"key": {
"city": 1
},
"name": "city_1",
"ns": "test.cities"
},
{
"v": 2,
"key": {
"country": 1
},
"name": "country_1",
"ns": "test.cities"
}
]
We would like to find the cities in the USA where the minimum number of
sunny days is 200 and the average number of sunny days is at least 220.
Lastly, we'd like to have the results sorted by the city's name. The
matching documents may or may not have a different shape than the
initial one.
We have the following query:
var pipeline = [
{"$addFields": { "min": {"$min": "$sunnydays"}}},
{"$addFields": { "mean": {"$avg": "$sunnydays" }}},
{"$sort": {"city": 1}},
{"$match": { "country": "USA", "min": {"$gte": 200},
"mean": {"$gte": 220}}},
]
db.cities.aggregate(pipeline)
However, this pipeline execution can be optimized!
Which of the following choices is still going to produce the expected
results and likely improve the most the execution of this aggregation
pipeline?
var pipeline = [
{"$sort": {"city": 1}},
{"$addFields": { "min": {"$min": "$sunnydays"}}},
{"$match": { "country": "USA", "min": {"$gte":
200}}},
]
var pipeline = [
{"$match": { "country": "USA"}},
{"$sort": {"city": 1}},
{"$addFields": { "min": {"$min": "$sunnydays"}}},
{"$match": { "min": {"$gte": 200}, "mean": {"$gte":
220}}},
{"$addFields": { "mean": {"$avg": "$sunnydays"
}}},
]
var pipeline = [
{"$sort": {"city": 1}},
{"$addFields": { "min": {"$min": "$sunnydays"}}},
{"$addFields": { "mean": {"$avg": "$sunnydays"
}}},
{"$match": { "country": "USA", "min": {"$gte": 200},
"mean": {"$gte": 220}}},
]
var pipeline = [
{"$sort": {"city": 1}},
{"$match": { "country": "USA"}},
{"$addFields": { "min": {"$min": "$sunnydays"}}},
{"$match": { "min": {"$gte": 200}, "mean": {"$gte":
220}}},
{"$addFields": { "mean": {"$avg": "$sunnydays"
}}},
]
var pipeline = [
{"$match": { "country": "USA"}},
{"$addFields": { "mean": {"$avg":
"$sunnydays"}}},
{"$match": { "mean": {"$gte": 220}, "sunnydays":
{"$not": {"$lt": 200 }}}},
{"$sort": {"city": 1}},
]
Question 6)
Consider the following people collection:
db.people.find().limit(5)
{ "_id" : 0, "name" : "Iva Estrada", "age" : 95, "state" : "WA", "phone"
: "(739) 557-2576", "ssn" : "901-34-4492" }
{ "_id" : 1, "name" : "Roger Walton", "age" : 92, "state" : "ID",
"phone" : "(948) 527-2370", "ssn" : "498-61-9106" }
{ "_id" : 2, "name" : "Isaiah Norton", "age" : 26, "state" : "FL",
"phone" : "(344) 479-5646", "ssn" : "052-49-6049" }
{ "_id" : 3, "name" : "Tillie Salazar", "age" : 88, "state" : "ND",
"phone" : "(216) 414-5981", "ssn" : "708-26-3486" }
{ "_id" : 4, "name" : "Cecelia Wells", "age" : 16, "state" : "SD",
"phone" : "(669) 809-9128", "ssn" : "977-00-7372" }
And the corresponding people_contacts view:
db.people_contacts.find().limit(5)
{ "_id" : 6585, "name" : "Aaron Alvarado", "phone" :
"(631)*********", "ssn" : "********8014" }
{ "_id" : 8510, "name" : "Aaron Barnes", "phone" : "(944)*********",
"ssn" : "********6820" }
{ "_id" : 6441, "name" : "Aaron Barton", "phone" : "(234)*********",
"ssn" : "********1937" }
{ "_id" : 8180, "name" : "Aaron Coleman", "phone" : "(431)*********",
"ssn" : "********7559" }
{ "_id" : 9738, "name" : "Aaron Fernandez", "phone" :
"(578)*********", "ssn" : "********0211" }
Which of the of the following commands generates this people_contacts
view?
var pipeline = [
{
"$sort": {"name": 1}
},
{
"$project": {"name":1,
"phone": {
"$concat": [
{"$arrayElemAt": [{"$split": ["$phone", "
"]}, 0]} ,
"*********" ]
},
"ssn": {
"$concat": [
"********",
{"$arrayElemAt": [{"$split": ["$ssn",
"-"]}, 2]}
]
}
}
}
];
db.createView("people", "people_contacts" pipeline);
var pipeline = [
{
"$project": {"name":1,
"phone": {
"$concat": [
{"$arrayElemAt": [{"$split": ["$phone", "
"]}, 0]} ,
"*********" ]
},
"ssn": {
"$concat": [
"********",
{"$arrayElemAt": [{"$split": ["$ssn",
"-"]}, 2]}
]
}
}
}
];
db.runCommand({
"create": "people_contacts",
"viewOn":"people",
"pipeline": pipeline})
var pipeline = [
{
"$sort": {"state": 1}
},
{
"$project": {"name":1,
"phone": {
"$concat": [
{"$arrayElemAt": [{"$split": ["$phone", "
"]}, 0]} ,
"*********" ]
},
"ssn": {
"$concat": [
"********",
{"$arrayElemAt": [{"$split": ["$ssn",
"-"]}, 2]}
]
}
}
}
];
db.runCommand({
"create": "people",
"viewOn":"people",
"pipeline": pipeline})
var pipeline = [
{
"$sort": {"name": 1}
},
{
"$project": {"name":1,
"phone": {
"$concat": [
{"$arrayElemAt": [{"$split": ["$phone", "
"]}, 0]} ,
"*********" ]
},
"ssn": {
"$concat": [
"********",
{"$arrayElemAt": [{"$split": ["$ssn",
"-"]}, 2]}
]
}
}
}
];
db.createView("people_contacts", "people", pipeline);
Question 7)
Using the air_alliances and air_routes collections, find which
alliance has the most unique carriers(airlines) operating between the
airports JFK and LHR, in both directions.
Names are distinct, i.e. Delta != Delta Air Lines
src_airport and dst_airport contain the originating and terminating
airport information.
- SkyTeam, with 4 carriers
- OneWorld, with 8 carriers
- OneWorld, with 4 carriers
- Star Alliance, with 6 carriers
________________________________________________________________________________
0 Comments